CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python run_lm_finetune.py \
--output_dir ./pretrained_models/poi-bert \
--model_type bert \
--model_name_or_path ./chinese-roberta-wwm-ext \
--config_name   ./chinese-roberta-wwm-ext \
--do_train \
--do_eval \
--learning_rate 1e-4 \
--num_train_epochs  8 \
--seed 64 \
--per_device_train_batch_size 120 \
--per_device_eval_batch_size 120 \
--dataloader_num_workers 8 \
--train_file ./pretrain_dataset/train.txt \
--validation_file ./pretrain_dataset/validation.txt \
--line_by_line \
--warmup_ratio 0.1 \
--logging_dir ./pretrain_logging \
--load_best_model_at_end \
--save_total_limit 100 \
--max_seq_length 64 \
--save_strategy steps \
--evaluation_strategy steps \
--overwrite_output_dir \
--logging_strategy steps \
--mlm_probability 0.25 \
--max_eval_samples 300000 \
--save_steps 1000 \
--eval_steps 1000 \
--fp16 \


